#Start by cleaning up the hackers diet excel sheet (removing unhelpful rows, etc.)
import pandas as pd
df = pd.read_excel("hackdiet_db.sv.xlsx")
df_clean_columns = df.rename(columns={"2020-04-12T18:35:41Z": "weight", "Epoch": "weigh_in_date", "Unnamed: 2": "C" , "Unnamed: 3": "D", "Unnamed: 4": "E", "Unnamed: 5": "F", "Unnamed: 6": "G", "Unnamed: 7": "H"}, errors="raise")
df_clean_columns = df_clean_columns.dropna(subset=['weight'])
df_clean_columns_rows = df_clean_columns[df_clean_columns['weigh_in_date'] != 'Date']
df_clean_columns_rows_2 = df_clean_columns_rows[df_clean_columns_rows['weigh_in_date'] != 'StartTrend']
df_clean_columns_rows_3 = df_clean_columns_rows_2[df_clean_columns_rows_2['weigh_in_date'] != 'User']
df_clean_columns_rows_4 = df_clean_columns_rows_3[df_clean_columns_rows_3['weigh_in_date'] != 'Preferences']
df_clean_columns_rows_5 = df_clean_columns_rows_4[df_clean_columns_rows_4['weigh_in_date'] != 'Diet-Plan']
df_clean_columns_rows_6 = df_clean_columns_rows_5.drop(columns=['C', 'D', 'E', 'F','G','H'], axis=1)
df_clean_columns_rows_6["weight"] = pd.to_numeric(df_clean_columns_rows_6["weight"], downcast="float")
# we need to convert kg to lbs; if <90. then weight *= 2.20462
df_clean_columns_rows_6.loc[df_clean_columns_rows_6['weight'] <= 90., 'weight'] *= 2.20462
# print to Excel
df_clean_columns_rows_6.to_excel("HackerDietClean.xlsx", engine='xlsxwriter', index=False)
print("done without errors")
df_clean_columns_rows_6
#Do MFP
import pandas as pd_mfp
df_mfp = pd_mfp.read_excel("mfp.xlsx")
df_clean_columns_mfp = df_mfp.rename(columns={"Date": "weigh_in_date", "Body Fat %": "B", "Fitbit steps": "C" , "Waist": "D", "Weight": "weight"}, errors="raise")
df_clean_columns_rows_mfp_2 = df_clean_columns_mfp.drop(columns=['B', 'C', 'D'], axis=1)
df_clean_columns_rows_mfp_2 = df_clean_columns_rows_mfp_2.dropna(subset=['weight'])
df_clean_columns_rows_mfp_2["weight"] = pd_mfp.to_numeric(df_clean_columns_rows_mfp_2["weight"], downcast="float")
df_clean_columns_rows_mfp_2.to_excel("MFPClean.xlsx", engine='xlsxwriter', index=False)
print("done without errors")
df_clean_columns_rows_mfp_2
#Do TrendWeight
import pandas as pd_tw
df_tw = pd_tw.read_excel("TrendWeight.xlsx")
df_clean_columns_tw = df_tw.rename(columns={"Date": "weigh_in_date", "WeightActual": "weight", "WeightTrend": "C", "WeightIsInterpolated": "D", "FatMassActual": "E", "FatMassTrend.1": "F", "FatPercentActual": "G", "FatMassTrend": "H", "LeanMassActual": "I", "LeanMassTrend": "J", "FatMassIsInterpolated": "K"}, errors="raise")
df_clean_columns_rows_tw_2 = df_clean_columns_tw.drop(columns=['C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K'], axis=1)
df_clean_columns_rows_tw_2 = df_clean_columns_rows_tw_2.dropna(subset=['weight'])
df_clean_columns_rows_tw_2["weight"] = pd_tw.to_numeric(df_clean_columns_rows_tw_2["weight"], downcast="float")
df_clean_columns_rows_tw_2.to_excel("TrendWeightClean.xlsx", engine='xlsxwriter', index=False)
print("done without errors")
df_clean_columns_rows_tw_2
#concatinate all MFP and Hackers Diet sheets so there are no duplicates
import pandas as pd_clean_join
mfp_clean_join = pd_clean_join.read_excel("MFPClean.xlsx")
hd_clean_join = pd_clean_join.read_excel("HackerDietClean.xlsx")
tw_clean_join = pd_clean_join.read_excel("TrendWeightClean.xlsx")
outer_join_first = mfp_clean_join.merge(hd_clean_join, how="outer")
outer_join_second = outer_join_first.merge(tw_clean_join, how="outer")
#Extra effort to make sure the outer join worked appropriately
outer_join_second.drop_duplicates()
#Write the concatenated file to disk
outer_join_second.to_excel("ThreeCleanSheets.xlsx",index=False)
#Now let's analyze ourself!
import pandas as pd_a
from fbprophet import Prophet
import numpy as np
data = pd_a.read_excel("ThreeCleanSheets.xlsx")
data["weigh_in_date"] = pd_a.to_datetime(data["weigh_in_date"])
data = data.sort_values(by="weigh_in_date")
data.set_index('weigh_in_date')['weight'].plot();
#data.describe()
data['weight'].plot(kind='hist', bins=20)
data.set_index('weigh_in_date')['weight'].plot(kind='kde');
df = pd_a.DataFrame({'x': data["weight"], 'y': data["weight"]})
ax = df.plot.hexbin(x='x', y='y', gridsize=12)
data.describe(percentiles=[0,1/10, 2/10, 3/10,4/10,5/10,6/10,7/10,8/10,9/10])
#predict the dad bod of 200 lbs.
# Prophet requires columns ds (Date) and y (value)
m = data.rename(columns={'weigh_in_date': 'ds', 'weight': 'y'})
n = Prophet()
n.fit(m)
future = n.make_future_dataframe(periods=1825)
future.tail()
forecast = n.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].tail()
fig1 = n.plot(forecast)
fig2 = n.plot_components(forecast)
# Make the prophet model and fit on the data
#gm_prophet = fbprophet.Prophet(changepoint_prior_scale=0.15, daily_seasonality=False)
#gm_prophet.fit(gm)
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()
fig = plot_plotly(n, forecast) # This returns a plotly Figure
py.iplot(fig)